import urllib.request
from bs4 import BeautifulSoup
import lxml
import requests

def getHtml(url):
    html = urllib.request.urlopen(url).read()
    return html


def saveHtml(file_name, file_content):
    #    注意windows文件命名的禁用符，比如 /
    with open(file_name.replace('/', '_') + ".html", "wb") as f:
        #   写文件用bytes而不是str，所以要转码
        f.write(file_content)


html = getHtml("https://voice.baidu.com/act/newpneumonia/newpneumonia/?from=osari_aladin_banner#tab4")
saveHtml("text1", html)

soup=BeautifulSoup(open('text1.html',encoding='utf-8'),features='html.parser')
from bs4 import BeautifulSoup
import urllib.request
import pandas as pd

url = 'file:///D:/neusoft/Aanaconda/pagetohtml/text1.html'
html = urllib.request.urlopen(url).read()

soup = BeautifulSoup(html, "html.parser")

tables = soup.find_all('table')

for tab in tables[1:]:

    # 提取样本名称
    sample = tab.find_all('a')[0].get_text()
    sample_name = sample.split(' ')[1][:-2]

    # 提取表格信息
    trs = tab.find_all('tr')
    for tr in trs[2:]:
        miRNA_family = tr.find_all('td')[0].get_text()
        count = tr.find_all('td')[1].get_text()

        # 存储信息
        content = [[miRNA_family, count]]
        list_name = ['疫情地区', '新增','现有', '累计','治愈', '死亡的顺序']
        data = pd.DataFrame(columns=list_name, data=content)
        data.to_csv("D:/neusoft/Aanaconda/pagetohtml/xingaun.csv".format(sample_name), mode='a', header=False, encoding='utf-8')

